package crawler;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;

public class ZhiHuProcessor implements PageProcessor{

	private Site site = new Site().setRetryTimes(3).setSleepTime(100).setTimeOut(1000)
	.setDomain("www.zhihu.com")
	.addCookie("_zap", "2945a431-b47c-4f50-9945-2c3fd6f89513")
	.addCookie("d_c0", "AFBCfnjyVwyPTkYXNjp8Q28mHTuGcJzKaus=|1504840463")
	.addCookie("q_c1", "5d74d1c69c8e431daa79ac8e99da9af0|1505462754000|1502699601000")
	.addCookie("aliyungf_tc", "AQAAAOYR+hJuewoAG/omG9L2BUNq2BYh")
	.addCookie("_xsrf", "4fe3fe6f06ba77d4434b91bd4d9415bf")
	.addCookie("_ga", "GA1.2.255091299.1504840218")
	.addCookie("_gid", "GA1.2.1551959581.1506652863")
	.addCookie("l_cap_id", "MWM5ZmNjZjk2MWFjNGRkNDhjOWZhY2ViZDUyNjBhOWE=|1506671757|9707e6977c0ac94285b7d0f3df667b82e696a87e")
	.addCookie("r_cap_id", "OGNiOGJkMDc1MTJhNDUwMjhkN2JlMjllMzM1MTJlNGE=|1506671757|bb9deece16257cb82625bbe14625671cf8c154da")
	.addCookie("cap_id", "NWNhZGM5ZmI4OThkNDhlNzk5ODZkMjU2OGY2MzRhZTU=|1506671757|394a3be7c1f0e9b947a2a949e606a1bcdcead6ed")
	.addCookie("z_c0", "Mi4xcllJUkJnQUFBQUFBVUVKLWVQSlhEQmNBQUFCaEFsVk5zb24xV1FBaS1NMjI3Z1g0WGRSV0cxRU85WTlObC1GbWZR|1506671794|ce526ca3705678a924f51d16a7825490546c834f")
	.addCookie("q_c1", "9e4d0a03447a42f6a93b0be8db1a764b|1506671886000|1502699601000")
	.addCookie("__utma", "51854390.255091299.1504840218.1506671791.1506671791.1")
	.addCookie("__utmc", "51854390")
	.addCookie("__utmz", "51854390.1506671791.1.1.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/")
	.addCookie("__utmv", "51854390.100--|2=registration_date=20170929=1^3=entry_date=20170814=1")
	.addCookie("_xsrf", "4fe3fe6f06ba77d4434b91bd4d9415bf")
	.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")
	.addHeader("Accept-Encoding", "gzip, deflate, br")
	.addHeader("Accept-Language", "zh-CN,zh;q=0.8")
	.addHeader("Cache-Control", "max-age=0")
	.addHeader("Connection", "keep-alive")
	.addHeader("Host", "www.zhihu.com")
	.addHeader("Upgrade-Insecure-Requests", "1")
	.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36");
	
	
	@Override
	public Site getSite() {
		return site;
	}

	@Override
	public void process(Page page) {
		page.putField("RichText", page.getHtml().xpath("//span[@class='RichText']/text()").all());
	}

	public static void main(String[] args) {
		Spider.create(new ZhiHuProcessor())
		.addUrl("www.zhihu.com")
		.addPipeline(new ConsolePipeline()).thread(1).run();
	}
}
